In [ ]:
# JosephDiPietro.github.io
## CMSC320 Final Project
## Introduction
####Human Trafficking has always been a major problem in the world and it has devastating effects on its victims.  With new databases we can examine risk factors in order to help mitigate this problem and counter the abusers.  In this project I will be examing the age of the victims, knowledge of the abuser, type of abuse used for trafficking, and whether the victim was abducted or not.  The data that I will use comes from the CTDC and contains information from all over the world.  In order to keep a reasonable scope for this project the data has been pre-processed to include online United States cases.  These cases range from years 2015 to 2018.  Using data analytics we can predict whether abusers are more violent or have a closer relation to the victim based on factors like age.
In [241]:
import pandas as pd
import seaborn as sea
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
import folium
from folium.plugins import MarkerCluster
In [221]:
globalFrame = pd.read_csv("trafficking.csv")
globalFrame = globalFrame.drop(globalFrame.columns[0],1)
globalFrame = globalFrame.drop(["Datasource","ageBroad","majorityStatus","majorityEntry"],1)
globalFrame = globalFrame.rename(columns = {"yearOfRegistration":"year","majorityStatusAtExploit":"majorityStatus","meansOfControlDebtBondage":"DebtBondage",
                                           "meansOfControlTakesEarnings":"EarningsStolen","meansOfControlRestrictsFinancialAccess": "WithholdsMoney",
                                           "meansOfControlThreats":"Threats","meansOfControlPsychologicalAbuse":"PsychologicalAbuse",
                                           "meansOfControlPhysicalAbuse":"PhysicalAbuse","meansOfControlSexualAbuse":"SexualAbuse",
                                           "meansOfControlFalsePromises":"FalsePromises","meansOfControlPsychoactiveSubstances":"PsychoactiveSubstances",
                                           "meansOfControlRestrictsMovement":"RestrictsMovement","meansOfControlRestrictsMedicalCare":"RestrictsMedicalCare",
                                           "meansOfControlExcessiveWorkingHours":"ExcessiveWorkingHours","meansOfControlUsesChildren":"UsesChildren",
                                           "meansOfControlThreatOfLawEnforcement":"ThreatOfLawEnforcement","meansOfControlWithholdsNecessities":"WithholdsNecessities",
                                           "meansOfControlWithholdsDocuments":"WithholdsDocuments","meansOfControlOther":"OtherControl","meansOfControlNotSpecified":"ControlNotSpecified",
                                           "recruiterRelationIntimatePartner":"IntimatePartner","recruiterRelationFriend":"Friend","recruiterRelationFamily":"Family",
                                           "recruiterRelationOther":"OtherRelation","recruiterRelationUnknown":"UnknownRelation"})
frame = globalFrame[globalFrame['citizenship'] == "US"]
frame = frame.reset_index()
frame = frame.drop('index',1)
frame.head()
frame["Cases"] = 1
frame.loc[frame['majorityStatus'] == '0', 'majorityStatus'] = "unkown"
ageFrame = frame.groupby(["majorityStatus","year"]).sum()
ageFrame['Cases'].plot.bar()
ageFrame = ageFrame.reset_index()
ageFrame
/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3145: DtypeWarning: Columns (6,55,57) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
Out[221]:
majorityStatus year DebtBondage EarningsStolen WithholdsMoney Threats PsychologicalAbuse PhysicalAbuse SexualAbuse FalsePromises ... typeOfSexPornography typeOfSexRemoteInteractiveServices typeOfSexPrivateSexualServices isAbduction IntimatePartner Friend Family OtherRelation UnknownRelation Cases
0 Adult 2015 5 12 0 13 20 15 4 4 ... 0 0 0 0 19 3 0 3 16 41
1 Adult 2016 12 21 8 45 47 45 29 20 ... 0 0 0 0 41 16 3 21 60 133
2 Adult 2017 10 20 1 63 50 51 21 9 ... 0 0 0 0 43 11 2 15 70 137
3 Adult 2018 5 16 2 37 38 28 11 8 ... 0 0 0 0 31 12 1 12 23 76
4 Minor 2015 13 36 4 56 101 72 42 10 ... 0 0 0 0 63 26 47 34 169 320
5 Minor 2016 21 43 6 111 140 118 76 20 ... 0 0 0 0 90 49 61 52 345 575
6 Minor 2017 20 44 5 109 149 104 56 18 ... 0 0 0 0 69 28 83 34 263 467
7 Minor 2018 6 17 3 67 107 70 65 3 ... 0 0 0 0 37 16 81 17 119 263
8 unkown 2015 13 18 2 39 56 46 11 4 ... 0 0 0 0 32 5 14 16 209 271
9 unkown 2016 13 40 12 80 114 119 29 15 ... 0 0 0 0 84 12 16 12 411 531
10 unkown 2017 27 36 6 122 107 107 37 7 ... 0 0 0 0 66 13 14 9 385 482
11 unkown 2018 11 27 3 81 98 96 41 11 ... 0 0 0 0 73 13 16 11 229 340

12 rows × 52 columns

In [211]:
ageFrame = ageFrame[ageFrame['majorityStatus'] != "unkown"]
physcialAbuse = sea.scatterplot(x = "year",y = "PhysicalAbuse",hue = "majorityStatus",data = ageFrame)
plt.show()
plt.clf()
sexualAbuse = sea.scatterplot(x = "year",y = "SexualAbuse",hue = "majorityStatus",data = ageFrame)
plt.show()
plt.clf()
pschologicalAbuse = sea.scatterplot(x = "year",y = "PsychologicalAbuse",hue = "majorityStatus",data = ageFrame)
plt.show()
plt.clf()
psychoactiveDrugs = sea.scatterplot(x = "year",y = "PsychoactiveSubstances",hue = "majorityStatus",data = ageFrame)
plt.show()
plt.clf()
<Figure size 432x288 with 0 Axes>
In [242]:
predictFrame = frame[frame["majorityStatus"] != "unkown"]
for index,row in predictFrame.iterrows():
    if(row.majorityStatus == "Minor"):
        predictFrame.loc[index,"BinaryAge"] = 1
    else:
        predictFrame.loc[index,"BinaryAge"] = 0
predictAge = predictFrame[["year","BinaryAge"]]
model = LogisticRegression()
model.fit(X = predictAge.drop("BinaryAge",1),y = predictAbuse["BinaryAge"])
model.predict(np.array([[2017]]))
predictFrame
/opt/conda/lib/python3.8/site-packages/pandas/core/indexing.py:1596: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
/opt/conda/lib/python3.8/site-packages/pandas/core/indexing.py:1765: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
Out[242]:
year gender majorityStatus citizenship DebtBondage EarningsStolen WithholdsMoney Threats PsychologicalAbuse PhysicalAbuse ... isAbduction RecruiterRelationship CountryOfExploitation IntimatePartner Friend Family OtherRelation UnknownRelation Cases BinaryAge
0 2015 Female Minor US 0 0 0 0 0 0 ... 0 Intimate Partner US 1 0 0 0 0 1 1.0
1 2015 Female Adult US 0 0 0 0 1 0 ... 0 Not Specified US 0 0 0 0 1 1 0.0
2 2015 Female Minor US 0 0 0 1 0 1 ... 0 Friend/Acquaintance US 0 1 0 0 0 1 1.0
6 2015 Female Minor US 0 0 0 1 0 0 ... 0 Other US 0 0 0 1 0 1 1.0
7 2015 Female Minor US 1 1 0 0 1 1 ... 0 Intimate Partner US 1 0 0 0 0 1 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3631 2018 Male Minor US 0 0 0 0 1 0 ... 0 Family/Relative US 0 0 1 0 0 1 1.0
3632 2018 Male Minor US 0 0 0 1 1 1 ... 0 Family/Relative US 0 0 1 0 0 1 1.0
3633 2018 Male Minor US 0 0 0 1 1 1 ... 0 Family/Relative US 0 0 1 0 0 1 1.0
3634 2018 Male Minor US 0 0 0 0 0 0 ... 0 Family/Relative US 0 0 1 0 0 1 1.0
3635 2018 Male Minor US 0 0 0 0 1 1 ... 0 Family/Relative US 0 0 1 0 0 1 1.0

2012 rows × 61 columns

In [253]:
predictAbuse = predictFrame[["year","DebtBondage","EarningsStolen","WithholdsMoney","Threats","PhysicalAbuse","SexualAbuse","FalsePromises",
                            "PsychoactiveSubstances","RestrictsMovement","RestrictsMedicalCare","ExcessiveWorkingHours",
                            "UsesChildren","ThreatOfLawEnforcement","WithholdsNecessities","WithholdsDocuments",
                            "OtherControl","ControlNotSpecified","BinaryAge"]]
model = LinearRegression()
model.fit(X = predictAbuse.drop("BinaryAge",1),y = predictAbuse["BinaryAge"])
print("A victim of threats with 1 being a minor in 2017 is",model.predict(np.array([[2017,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0]])))
A victim of threats with 1 being a minor in 2017 is [0.77239109]
In [251]:
predictRecruit = predictFrame[["year","IntimatePartner","Family","OtherRelation","UnknownRelation","BinaryAge"]]
model = LinearRegression()
model.fit(X = predictRecruit.drop("BinaryAge",1),y = predictAbuse["BinaryAge"])
print("A victim of a family member with one being a minor in 2017 is",model.predict(np.array([[2017,0,1,0,0]])))
print("A victim of an intimate partner with one being a minor in 2015 is",model.predict(np.array([[2015,1,0,0,0]])))
A victim of a family member with one being a minor in 2017 is [0.96826561]
A victim of an intimate partner with one being a minor in 2015 is [0.71835606]
In [248]:
m = folium.Map()
cluster = MarkerCluster().add_to(m)
for index,row in globalFrame.iterrows():
    if(row.citizenship == "CO"):
        folium.map.Marker(location = ['4.5709','-74.2973'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "MD"):
        folium.map.Marker(location = ['47.4116','28.3699'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "RO"):
        folium.map.Marker(location = ['45.9432','24.9668'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "UA"):
        folium.map.Marker(location = ['48.3794','31.1656'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "BY"):
        folium.map.Marker(location = ['53.7098','27.9534'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "HT"):
        folium.map.Marker(location = ['18.9712','-72.2852'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "UZ"):
        folium.map.Marker(location = ['41.3775','64.5853'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "LK"):
        folium.map.Marker(location = ['7.8731','80.7718'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "LK"):
        folium.map.Marker(location = ['7.8731','80.7718'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "MM"):
        folium.map.Marker(location = ['21.9162','95.9560'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "UG"):
        folium.map.Marker(location = ['1.3733','32.2903'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "ID"):
        folium.map.Marker(location = ['-0.7893','113.9213'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "ID"):
        folium.map.Marker(location = ['-0.7893','113.9213'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "KG"):
        folium.map.Marker(location = ['42.882004','74.582748'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "AF"):
        folium.map.Marker(location = ['33.9391','67.7100'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "ER"):
        folium.map.Marker(location = ['15.1794','39.7823'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "NG"):
        folium.map.Marker(location = ['17.6078','8.0817'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "NP"):
        folium.map.Marker(location = ['28.3949','84.1240'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "PH"):
        folium.map.Marker(location = ['12.8797','121.7740'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "KH"):
        folium.map.Marker(location = ['12.5657','104.9910'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "BD"):
        folium.map.Marker(location = ['23.6850','90.3563'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "US"):
        folium.map.Marker(location = ['37.0902','-95.7129'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "TH"):
        folium.map.Marker(location = ['15.8700','100.9925'],popup = row.citizenship).add_to(cluster)
    if(row.citizenship == "VN"):
        folium.map.Marker(location = ['14.0583','108.2772'],popup = row.citizenship).add_to(cluster)
m
Out[248]:
Make this Notebook Trusted to load map: File -> Trust Notebook